#Load necessary libraries

library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ ggplot2 3.3.5     ✓ purrr   0.3.4
## ✓ tibble  3.1.6     ✓ dplyr   1.0.8
## ✓ tidyr   1.2.0     ✓ stringr 1.4.0
## ✓ readr   2.1.2     ✓ forcats 0.5.1
## Warning: package 'tidyr' was built under R version 4.0.5
## Warning: package 'readr' was built under R version 4.0.5
## Warning: package 'dplyr' was built under R version 4.0.5
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(broom)
## Warning: package 'broom' was built under R version 4.0.5
library(dplyr)

#Read in files
covid_data_cap <- read.csv("/Users/lorenzocapitani/OneDrive\ -\ Cardiff\ University/Bioinformatics/MartinPaper/April2022_Cap.csv")
covid_data_vein <- read.csv("/Users/lorenzocapitani/OneDrive\ -\ Cardiff\ University/Bioinformatics/MartinPaper/April2022_Vein.csv")

#Drop unneeded columns
covid_data_cap <- covid_data_cap[,-c(6,7,16:21)]
covid_data_vein <- covid_data_vein[,-c(4,6,8,10)]
#Clean dataset: convert binary factors to 1 or 0 
covid_data_cap$COVID.19.Positive. <- ifelse(test=covid_data_cap$COVID.19.Positive. == "Y", yes=1, no=0)
covid_data_cap$Vaccinated. <- ifelse(test=covid_data_cap$Vaccinated. == "Y", yes=1, no=0)
covid_data_cap$Prior.COVID.19. <- ifelse(test=covid_data_cap$Prior.COVID.19. == "Y", yes=1, no=0)
covid_data_cap[covid_data_cap$Gender == "male",]$Gender <- 0
covid_data_cap[covid_data_cap$Gender == "female",]$Gender <- 1
covid_data_cap$Gender <- as.integer(covid_data_cap$Gender)
covid_data_cap$Significant.co.morbidity. <- ifelse(test=covid_data_cap$Significant.co.morbidity. == "Y", yes=1, no=0)
covid_data_cap$Ethnicity..Y...BAME. <- ifelse(test=covid_data_cap$Ethnicity..Y...BAME. == "Y", yes=1, no=0)

#Repeat for venous dataset
covid_data_vein$COVID.19.Positive. <- ifelse(test=covid_data_vein$COVID.19.Positive. == "Y", yes=1, no=0)
covid_data_vein$Vaccinated. <- ifelse(test=covid_data_vein$Vaccinated. == "Y", yes=1, no=0)
covid_data_vein$Prior.COVID.19. <- ifelse(test=covid_data_vein$Prior.COVID.19. == "Y", yes=1, no=0)
covid_data_vein[covid_data_vein$Gender == "M",]$Gender <- 0
covid_data_vein[covid_data_vein$Gender == "F",]$Gender <- 1
covid_data_vein$Gender <- as.integer(covid_data_vein$Gender)

Correlation plotting before mean interpolation Correlation matrices from Figures 1 and 2

library(corrplot)
## corrplot 0.92 loaded
# Which parameters to include
numeric_covid_cap <- covid_data_cap[,c(2,3,4,5,6,7,8,9,10,11,12,13)]
numeric_covid_vein <- covid_data_vein[,c(2,3,4,5,6,7,8,9,10,11)]
covid_vein_cor_matrix <- cor(numeric_covid_vein, use = "complete.obs", method = "spearman")
covid_cap_cor_matrix <- cor(numeric_covid_cap, use = "complete.obs", method = "spearman")

#Obtain p-value matrix for each comparison and fix for multiple comparisons 
testRes_vein = cor.mtest(numeric_covid_vein, conf.level = 0.95,  method = "spearman", adjust="holm", exact = F)
testRes_cap = cor.mtest(numeric_covid_cap, conf.level = 0.95,  method = "spearman", adjust="holm", exact = F)

corrplot(covid_vein_cor_matrix, p.mat = testRes_vein$p, sig.level = 0.05, method = 'square', order="original", tl.cex = 2, tl.col = "black", insig = "blank", type = 'lower') 
title(main = "Vein dataset", cex.main =2, line = 0)

corrplot(covid_cap_cor_matrix, p.mat = testRes_cap$p, sig.level = 0.05, method = 'square', order="original", tl.cex = 2, tl.col = "black", insig = "blank", type = 'lower') 
title(main = "Capillary dataset", cex.main =2, line = 0)

Mean interpolation of few missing data points cprior to modelling

#A visual take on the missing values 
library(Amelia)
## Loading required package: Rcpp
## Warning: package 'Rcpp' was built under R version 4.0.5
## ## 
## ## Amelia II: Multiple Imputation
## ## (Version 1.8.0, built: 2021-05-26)
## ## Copyright (C) 2005-2022 James Honaker, Gary King and Matthew Blackwell
## ## Refer to http://gking.harvard.edu/amelia/ for more information
## ##
missmap(covid_data_vein, main = "Missing values vs observed in vein dataset")

missmap(covid_data_cap, main = "Missing values vs observed in cap dataset")

# Mean interpolation for the few missing values present for vein dataset
covid_data_vein$RBD.IgG..BAU.ml.[is.na(covid_data_vein$RBD.IgG..BAU.ml.)] <- mean(covid_data_vein$RBD.IgG..BAU.ml.,na.rm=T)
covid_data_vein$S1.IgG..BAU.ml.[is.na(covid_data_vein$S1.IgG..BAU.ml.)] <- mean(covid_data_vein$S1.IgG..BAU.ml., na.rm=T)
covid_data_vein$S2.IgG..BAU.ml.[is.na(covid_data_vein$S2.IgG..BAU.ml.)] <- mean(covid_data_vein$S2.IgG..BAU.ml., na.rm = T)
covid_data_vein$N.IgG..BAU.ml.[is.na(covid_data_vein$N.IgG..BAU.ml.)] <- mean(covid_data_vein$N.IgG..BAU.ml., na.rm=T)

# Mean interpolation for the few missing values present for cap dataset
covid_data_cap$Age[is.na(covid_data_cap$Age)] <- mean(covid_data_cap$Age,na.rm=T)
covid_data_cap$IFNG[is.na(covid_data_cap$IFNG)] <- mean(covid_data_cap$IFNG,na.rm=T)
covid_data_cap$RBD.IgG..BAU.ml.[is.na(covid_data_cap$RBD.IgG..BAU.ml.)] <- mean(covid_data_cap$RBD.IgG..BAU.ml., na.rm = T)
covid_data_cap$N.IgG..BAU.ml.[is.na(covid_data_cap$N.IgG..BAU.ml.)] <- mean(covid_data_cap$N.IgG..BAU.ml., na.rm = T)
covid_data_cap$S1.IgG..BAU.ml.[is.na(covid_data_cap$S1.IgG..BAU.ml.)] <- mean(covid_data_cap$S1.IgG..BAU.ml., na.rm = T)
covid_data_cap$S2.IgG..BAU.ml.[is.na(covid_data_cap$S2.IgG..BAU.ml.)] <- mean(covid_data_cap$S2.IgG..BAU.ml., na.rm=T)
# Drop row with missing gender value
covid_data_cap <- covid_data_cap[!is.na(covid_data_cap$Gender),]

missmap(covid_data_vein, main = "Missing values vs observed in vein dataset")

missmap(covid_data_cap, main = "Missing values vs observed in cap dataset")

Preliminary univariate analysis of venous dataset - not included in study

library(ggplot2)
library(ggbeeswarm)
library(ggpubr)

numeric_covid_vein <- covid_data_vein[,c(2,3,4,5,6,7,8,9,10,11)]
numeric_covid_vein$COVID.19.Positive. <- factor(numeric_covid_vein$COVID.19.Positive.)
numeric_covid_vein$Vaccinated. <- factor(numeric_covid_vein$Vaccinated.)
numeric_covid_vein$Prior.COVID.19. <- factor(numeric_covid_vein$Prior.COVID.19.)
numeric_covid_vein$Gender <- factor(numeric_covid_vein$Gender)


ggplot(numeric_covid_vein, aes(x = COVID.19.Positive., y = IFNg)) +
  geom_violin() +
  geom_quasirandom(width = 0.2) +
  scale_x_discrete(labels = c("No", "Yes")) + 
  xlab("Breakthrough COVID19 infection") +
  ylab("IFNg (pg/ml)") +
  stat_compare_means(label.x.npc = 0.4)

ggplot(numeric_covid_vein, aes(x = COVID.19.Positive., y = RBD.IgG..BAU.ml.)) +
  geom_violin() +
  geom_quasirandom(width = 0.2) +
  scale_x_discrete(labels = c("No", "Yes")) + 
  xlab("Breakthrough COVID19 infection") +
  ylab("Ant-RBD IgG titres") +
  stat_compare_means(label.x.npc = 0.4)

ggplot(numeric_covid_vein, aes(x = COVID.19.Positive., y = N.IgG..BAU.ml.)) +
  geom_violin() +
  geom_quasirandom(width = 0.2) +
  scale_x_discrete(labels = c("No", "Yes")) + 
  xlab("Breakthrough COVID19 infection") +
  ylab("Ant-RBD IgG titres") +
  stat_compare_means(label.x.npc = 0.4)

ggplot(numeric_covid_vein, aes(x = COVID.19.Positive., y = S1.IgG..BAU.ml.)) +
  geom_violin() +
  geom_quasirandom(width = 0.2) +
  scale_x_discrete(labels = c("No", "Yes")) + 
  xlab("Breakthrough COVID19 infection") +
  ylab("Ant-S1 IgG titres") +
  stat_compare_means(label.x.npc = 0.4)

ggplot(numeric_covid_vein, aes(x = COVID.19.Positive., y = S2.IgG..BAU.ml.)) +
  geom_violin() +
  geom_quasirandom(width = 0.2) +
  scale_x_discrete(labels = c("No", "Yes")) + 
  xlab("Breakthrough COVID19 infection") +
  ylab("Ant-S2 IgG titres") +
  stat_compare_means(label.x.npc = 0.4)

ggplot(numeric_covid_vein, aes(x = COVID.19.Positive., fill = Vaccinated.)) +
  geom_bar(stat ='count',position = "dodge2") +
  scale_x_discrete(labels = c("No", "Yes")) + 
  xlab("Breakthrough COVID19 infection") +
  ylab("Count") +
  labs(fill = "Vaccinated") +
  scale_fill_manual(labels = c("No", "Yes"), values = c("brown3", "darkgoldenrod1")) 

ggplot(numeric_covid_vein, aes(x = COVID.19.Positive., fill = Prior.COVID.19.)) +
  geom_bar(stat ='count',position = "dodge2") +
  scale_x_discrete(labels = c("No", "Yes")) + 
  xlab("Breakthrough COVID19 infection") +
  ylab("Count") +
  labs(fill = "Prior COVID infection") +
  scale_fill_manual(labels = c("No", "Yes"), values = c("brown3", "darkgoldenrod1")) 

ggplot(numeric_covid_vein, aes(x = COVID.19.Positive., fill = Gender)) +
  geom_bar(stat ='count',position = "dodge2") +
  scale_x_discrete(labels = c("No", "Yes")) + 
  xlab("Breakthrough COVID19 infection") +
  ylab("Count") +
  labs(fill = "Gender") +
  scale_fill_manual(labels = c("Male", "Female"), values = c("brown3", "darkgoldenrod1")) 

ggplot(numeric_covid_vein, aes(x = COVID.19.Positive., y = Age)) +
    geom_violin() +
  geom_quasirandom(width = 0.2) +
  xlab("Breakthrough COVID19 infection") +
  ylab("Age") +
    scale_x_discrete(labels = c("No", "Yes"))+
  stat_compare_means(label.x.npc = 0.4)

Preliminary univariate analysis of capillary dataset - not included in study

numeric_covid_cap <-  covid_data_cap[,c(2,3,4,5,6,7,8,9,10,11,12,13)]
numeric_covid_cap$COVID.19.Positive. <- factor(numeric_covid_cap $COVID.19.Positive.)
numeric_covid_cap$Vaccinated. <- factor(numeric_covid_cap $Vaccinated.)
numeric_covid_cap$Prior.COVID.19. <- factor(numeric_covid_cap $Prior.COVID.19.)
numeric_covid_cap$Gender <- factor(numeric_covid_cap $Gender)
numeric_covid_cap$Significant.co.morbidity. <- factor(numeric_covid_cap$Significant.co.morbidity.)
numeric_covid_cap$Ethnicity..Y...BAME. <- factor(numeric_covid_cap$Ethnicity..Y...BAME.)
ggplot(numeric_covid_cap , aes(x = COVID.19.Positive., y = IFNG)) +
  geom_violin() +
  geom_quasirandom(width = 0.2) +
  scale_x_discrete(labels = c("No", "Yes")) + 
  xlab("Breakthrough COVID19 infection") +
  ylab("IFNg (pg/ml)") +stat_compare_means(label.x.npc = 0.4)

ggplot(numeric_covid_cap, aes(x = COVID.19.Positive., y = RBD.IgG..BAU.ml.)) +
  geom_violin() +
  geom_quasirandom(width = 0.2) +
  scale_x_discrete(labels = c("No", "Yes")) + 
  xlab("Breakthrough COVID19 infection") +
  ylab("Ant-RBD IgG titres") +
  stat_compare_means(label.x.npc = 0.4)

ggplot(numeric_covid_cap, aes(x = COVID.19.Positive., y = N.IgG..BAU.ml.)) +
  geom_violin() +
  geom_quasirandom(width = 0.2) +
  scale_x_discrete(labels = c("No", "Yes")) + 
  xlab("Breakthrough COVID19 infection") +
  ylab("Ant-RBD IgG titres") +
  stat_compare_means(label.x.npc = 0.4)

ggplot(numeric_covid_cap, aes(x = COVID.19.Positive., y = S1.IgG..BAU.ml.)) +
  geom_violin() +
  geom_quasirandom(width = 0.2) +
  scale_x_discrete(labels = c("No", "Yes")) + 
  xlab("Breakthrough COVID19 infection") +
  ylab("Ant-S1 IgG titres") +
  stat_compare_means(label.x.npc = 0.4)

ggplot(numeric_covid_cap, aes(x = COVID.19.Positive., y = S2.IgG..BAU.ml.)) +
  geom_violin() +
  geom_quasirandom(width = 0.2) +
  scale_x_discrete(labels = c("No", "Yes")) + 
  xlab("Breakthrough COVID19 infection") +
  ylab("Ant-S2 IgG titres") +
  stat_compare_means(label.x.npc = 0.4)

ggplot(numeric_covid_cap , aes(x = COVID.19.Positive., fill = Vaccinated.)) +
  geom_bar(stat ='count',position = "dodge2") +
  scale_x_discrete(labels = c("No", "Yes")) + 
  xlab("Breakthrough COVID19 infection") +
  ylab("Count") +
  labs(fill = "Vaccinated") +
  scale_fill_manual(labels = c("No", "Yes"), values = c("brown3", "darkgoldenrod1")) 

ggplot(numeric_covid_cap , aes(x = COVID.19.Positive., fill = Prior.COVID.19.)) +
  geom_bar(stat ='count',position = "dodge2") +
  scale_x_discrete(labels = c("No", "Yes")) + 
  xlab("Breakthrough COVID19 infection") +
  ylab("Count") +
  labs(fill = "Prior COVID infection") +
  scale_fill_manual(labels = c("No", "Yes"), values = c("brown3", "darkgoldenrod1")) 

ggplot(numeric_covid_cap , aes(x = COVID.19.Positive., fill = Gender)) +
  geom_bar(stat ='count',position = "dodge2") +
  scale_x_discrete(labels = c("No", "Yes")) + 
  xlab("Breakthrough COVID19 infection") +
  ylab("Count") +
  labs(fill = "Gender") +
  scale_fill_manual(labels = c("Male", "Female"), values = c("brown3", "darkgoldenrod1")) 

ggplot(numeric_covid_cap , aes(x = COVID.19.Positive., fill = Significant.co.morbidity.)) +
  geom_bar(stat ='count',position = "dodge2") +
  scale_x_discrete(labels = c("No", "Yes")) + 
  xlab("Breakthrough COVID19 infection") +
  ylab("Count") +
  labs(fill = "Significant comorbidity") +
  scale_fill_manual(labels = c("No", "Yes"), values = c("brown3", "darkgoldenrod1")) 

ggplot(numeric_covid_cap , aes(x = COVID.19.Positive., y = Age)) +
    geom_violin() +
  geom_quasirandom(width = 0.2) +
  xlab("Breakthrough COVID19 infection") +
  ylab("Age") +
    scale_x_discrete(labels = c("No", "Yes"))+
  stat_compare_means(label.x.npc = 0.4)

ggplot(numeric_covid_cap , aes(x = COVID.19.Positive., fill = Ethnicity..Y...BAME.)) +
  geom_bar(stat ='count',position = "dodge2") +
  scale_x_discrete(labels = c("No", "Yes")) + 
  xlab("Breakthrough COVID19 infection") +
  ylab("Count") +
  labs(fill = "Ethnicity") +
  scale_fill_manual(labels = c("Non-BAME", "BAME"), values = c("brown3", "darkgoldenrod1")) 

Logistic regression modelling for venous dataset used in Extended Figure 3

library(caret)
## Warning: package 'caret' was built under R version 4.0.5
## Loading required package: lattice
## 
## Attaching package: 'caret'
## The following object is masked from 'package:purrr':
## 
##     lift
library(OddsPlotty)
library(ggplot2)
library(OddsPlotty)
library(caret)
library(tidyverse)
library(rms)
## Loading required package: Hmisc
## Loading required package: survival
## Warning: package 'survival' was built under R version 4.0.5
## 
## Attaching package: 'survival'
## The following object is masked from 'package:caret':
## 
##     cluster
## Loading required package: Formula
## 
## Attaching package: 'Hmisc'
## The following objects are masked from 'package:dplyr':
## 
##     src, summarize
## The following objects are masked from 'package:base':
## 
##     format.pval, units
## Loading required package: SparseM
## 
## Attaching package: 'SparseM'
## The following object is masked from 'package:base':
## 
##     backsolve
## Warning in .recacheSubclasses(def@className, def, env): undefined subclass
## "numericVector" of class "Mnumeric"; definition not updated
numeric_covid_vein <- covid_data_vein[,c(2,3,4,5,6,7,8,9,10,11)]
#Conversion of numeric measurements to quartiles

q1 <- summary(numeric_covid_vein$IFNg)[2]
q2 <- summary(numeric_covid_vein$IFNg)[3]
q3 <- summary(numeric_covid_vein$IFNg)[5]

q1r <- summary(numeric_covid_vein$RBD.IgG..BAU.ml.)[2]
q2r <- summary(numeric_covid_vein$RBD.IgG..BAU.ml.)[3]
q3r <- summary(numeric_covid_vein$RBD.IgG..BAU.ml.)[5]

q1s1 <- summary(numeric_covid_vein$S1.IgG..BAU.ml.)[2]
q2s1<- summary(numeric_covid_vein$S1.IgG..BAU.ml.)[3]
q3s1 <- summary(numeric_covid_vein$S1.IgG..BAU.ml.)[5]

q1s2<- summary(numeric_covid_vein$S2.IgG..BAU.ml.)[2]
q2s2<- summary(numeric_covid_vein$S2.IgG..BAU.ml.)[3]
q3s2 <- summary(numeric_covid_vein$S2.IgG..BAU.ml.)[5]

q1n <- summary(numeric_covid_vein$N.IgG..BAU.ml.)[2]
q2n <- summary(numeric_covid_vein$N.IgG..BAU.ml.)[3]
q3n  <- summary(numeric_covid_vein$N.IgG..BAU.ml.)[5]

numeric_covid_vein[numeric_covid_vein$IFNg < q1,]$IFNg <- 1
numeric_covid_vein[numeric_covid_vein$IFNg >= q1 & numeric_covid_vein$IFNg < q2,]$IFNg <- 2
numeric_covid_vein[numeric_covid_vein$IFNg >= q2 & numeric_covid_vein$IFNg < q3,]$IFNg <- 3
numeric_covid_vein[numeric_covid_vein$IFNg >= q3,]$IFNg <- 4

numeric_covid_vein[numeric_covid_vein$RBD.IgG..BAU.ml. < q1r,]$RBD.IgG..BAU.ml. <- 1
numeric_covid_vein[numeric_covid_vein$RBD.IgG..BAU.ml. >= q1r & numeric_covid_vein$RBD.IgG..BAU.ml. < q2r,]$RBD.IgG..BAU.ml. <- 2
numeric_covid_vein[numeric_covid_vein$RBD.IgG..BAU.ml. >= q2r & numeric_covid_vein$RBD.IgG..BAU.ml. < q3r,]$RBD.IgG..BAU.ml. <- 3
numeric_covid_vein[numeric_covid_vein$RBD.IgG..BAU.ml. >= q3r,]$RBD.IgG..BAU.ml. <- 4

numeric_covid_vein[numeric_covid_vein$S1.IgG..BAU.ml. < q1s1,]$S1.IgG..BAU.ml. <- 1
numeric_covid_vein[numeric_covid_vein$S1.IgG..BAU.ml. >= q1s1 & numeric_covid_vein$S1.IgG..BAU.ml. < q2s1,]$S1.IgG..BAU.ml. <- 2
numeric_covid_vein[numeric_covid_vein$S1.IgG..BAU.ml. >= q2s1 & numeric_covid_vein$S1.IgG..BAU.ml. < q3s1,]$S1.IgG..BAU.ml. <- 3
numeric_covid_vein[numeric_covid_vein$S1.IgG..BAU.ml. >= q3s1,]$S1.IgG..BAU.ml. <- 4

numeric_covid_vein[numeric_covid_vein$S2.IgG..BAU.ml. < q1s2,]$S2.IgG..BAU.ml. <- 1
numeric_covid_vein[numeric_covid_vein$S2.IgG..BAU.ml. >= q1s2 & numeric_covid_vein$S2.IgG..BAU.ml. < q2s2,]$S2.IgG..BAU.ml. <- 2
numeric_covid_vein[numeric_covid_vein$S2.IgG..BAU.ml. >= q2s2 & numeric_covid_vein$S2.IgG..BAU.ml. < q3s2,]$S2.IgG..BAU.ml. <- 3
numeric_covid_vein[numeric_covid_vein$S2.IgG..BAU.ml. >= q3s2,]$S2.IgG..BAU.ml. <- 4

numeric_covid_vein[numeric_covid_vein$N.IgG..BAU.ml. < q1n,]$N.IgG..BAU.ml. <- 1
numeric_covid_vein[numeric_covid_vein$N.IgG..BAU.ml. >= q1n & numeric_covid_vein$N.IgG..BAU.ml. < q2n,]$N.IgG..BAU.ml. <- 2
numeric_covid_vein[numeric_covid_vein$N.IgG..BAU.ml. >= q2n & numeric_covid_vein$N.IgG..BAU.ml. < q3n,]$N.IgG..BAU.ml. <- 3
numeric_covid_vein[numeric_covid_vein$N.IgG..BAU.ml. >= q3n,]$N.IgG..BAU.ml. <- 4

numeric_covid_vein$IFNg <- factor(numeric_covid_vein$IFNg, levels = c(1,2,3,4))
numeric_covid_vein$RBD.IgG..BAU.ml. <- factor(numeric_covid_vein$RBD.IgG..BAU.ml., levels = c(1,2,3,4))
numeric_covid_vein$S1.IgG..BAU.ml. <- factor(numeric_covid_vein$S1.IgG..BAU.ml., levels = c(1,2,3,4))
numeric_covid_vein$S2.IgG..BAU.ml. <- factor(numeric_covid_vein$S2.IgG..BAU.ml., levels = c(1,2,3,4))
numeric_covid_vein$N.IgG..BAU.ml. <- factor(numeric_covid_vein$N.IgG..BAU.ml., levels = c(1,2,3,4))

#Logistic modelling

model_vein <- glm(COVID.19.Positive.~ ., data=numeric_covid_vein, family=binomial(link='logit'))

summary(model_vein)
## 
## Call:
## glm(formula = COVID.19.Positive. ~ ., family = binomial(link = "logit"), 
##     data = numeric_covid_vein)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.5694  -0.7026  -0.3230   0.6947   2.5948  
## 
## Coefficients:
##                   Estimate Std. Error z value Pr(>|z|)   
## (Intercept)        1.82794    2.09510   0.872  0.38295   
## IFNg2             -0.63505    0.57320  -1.108  0.26790   
## IFNg3             -1.33151    0.61543  -2.164  0.03050 * 
## IFNg4             -2.32305    0.89946  -2.583  0.00980 **
## RBD.IgG..BAU.ml.2 -0.32378    0.89221  -0.363  0.71669   
## RBD.IgG..BAU.ml.3 -0.20532    1.06644  -0.193  0.84733   
## RBD.IgG..BAU.ml.4 -0.84979    1.04882  -0.810  0.41781   
## S1.IgG..BAU.ml.2   0.24328    1.31260   0.185  0.85296   
## S1.IgG..BAU.ml.3   0.58586    1.66034   0.353  0.72420   
## S1.IgG..BAU.ml.4   1.94404    1.49298   1.302  0.19287   
## S2.IgG..BAU.ml.2  -0.51365    1.04574  -0.491  0.62330   
## S2.IgG..BAU.ml.3  -1.75034    1.27240  -1.376  0.16894   
## S2.IgG..BAU.ml.4  -0.44526    1.26058  -0.353  0.72392   
## N.IgG..BAU.ml.2    0.11950    0.60225   0.198  0.84271   
## N.IgG..BAU.ml.3   -0.72724    0.90697  -0.802  0.42265   
## N.IgG..BAU.ml.4    0.31107    0.86174   0.361  0.71811   
## Vaccinated.       -0.88544    1.86828  -0.474  0.63555   
## Prior.COVID.19.   -2.07995    0.74069  -2.808  0.00498 **
## Gender             0.10982    0.55175   0.199  0.84223   
## Age               -0.01471    0.02119  -0.694  0.48764   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 170.70  on 147  degrees of freedom
## Residual deviance: 126.65  on 128  degrees of freedom
## AIC: 166.65
## 
## Number of Fisher Scoring iterations: 6
#Extraction of odds ratios 
odds <- odds_plot(model_vein)
## Waiting for profiling to be done...
odds <- odds$odds_data
write.csv(odds, file = "...")
odds$OR <- log(odds$OR)
odds$lower <- log(odds$lower)
odds$upper <- log(odds$upper)

#Odds ratio plotting
ggplot(odds, aes(x = OR, y = vars)) + 
    geom_vline(aes(xintercept = 0), size = .25, linetype = "dashed") + 
    geom_errorbarh(aes(xmax = upper, xmin = lower), size = .5, height = 
                    .2, color = "gray50") +
    geom_point(size = 3.5, color = "orange") +
    theme_bw()+
    theme(panel.grid.minor = element_blank()) +
    ylab("") +
    xlab("log(Odds ratio)") +
    ggtitle("Parameters and the risk of COVID infections - Venous cohort") +
  scale_x_continuous(limits = c(-6,6))

# Odd as percentages
(exp(model_vein$coefficients[-1])-1)*100
##             IFNg2             IFNg3             IFNg4 RBD.IgG..BAU.ml.2 
##        -47.009189        -73.592272        -90.202559        -27.658838 
## RBD.IgG..BAU.ml.3 RBD.IgG..BAU.ml.4  S1.IgG..BAU.ml.2  S1.IgG..BAU.ml.3 
##        -18.561367        -57.249403         27.542378         79.653519 
##  S1.IgG..BAU.ml.4  S2.IgG..BAU.ml.2  S2.IgG..BAU.ml.3  S2.IgG..BAU.ml.4 
##        598.695015        -40.169243        -82.628483        -35.934191 
##   N.IgG..BAU.ml.2   N.IgG..BAU.ml.3   N.IgG..BAU.ml.4       Vaccinated. 
##         12.693257        -51.675678         36.488572        -58.746828 
##   Prior.COVID.19.            Gender               Age 
##        -87.506347         11.607703         -1.459982

Logistic regression modelling for capillary dataset used in Extended Figure 3

numeric_covid_cap <- covid_data_cap[,c(2,3,4,5,6,7,8,9,10,11,12,13)]

q1 <- summary(numeric_covid_cap$IFNG)[2]
q2 <- summary(numeric_covid_cap$IFNG)[3]
q3 <- summary(numeric_covid_cap$IFNG)[5]

q1r <- summary(numeric_covid_cap$RBD.IgG..BAU.ml.)[2]
q2r <- summary(numeric_covid_cap$RBD.IgG..BAU.ml.)[3]
q3r <- summary(numeric_covid_cap$RBD.IgG..BAU.ml.)[5]

q1s1 <- summary(numeric_covid_cap$S1.IgG..BAU.ml.)[2]
q2s1<- summary(numeric_covid_cap$S1.IgG..BAU.ml.)[3]
q3s1 <- summary(numeric_covid_cap$S1.IgG..BAU.ml.)[5]

q1s2<- summary(numeric_covid_cap$S2.IgG..BAU.ml.)[2]
q2s2<- summary(numeric_covid_cap$S2.IgG..BAU.ml.)[3]
q3s2 <- summary(numeric_covid_cap$S2.IgG..BAU.ml.)[5]

q1n <- summary(numeric_covid_cap$N.IgG..BAU.ml.)[2]
q2n <- summary(numeric_covid_cap$N.IgG..BAU.ml.)[3]
q3n  <- summary(numeric_covid_cap$N.IgG..BAU.ml.)[5]

numeric_covid_cap[numeric_covid_cap$IFNG < q1,]$IFNG <- 1
numeric_covid_cap[numeric_covid_cap$IFNG >= q1 & numeric_covid_cap$IFNG < q2,]$IFNG <- 2
numeric_covid_cap[numeric_covid_cap$IFNG >= q2 & numeric_covid_cap$IFNG < q3,]$IFNG <- 3
numeric_covid_cap[numeric_covid_cap$IFNG >= q3,]$IFNG <- 4

numeric_covid_cap[numeric_covid_cap$RBD.IgG..BAU.ml. < q1r,]$RBD.IgG..BAU.ml. <- 1
numeric_covid_cap[numeric_covid_cap$RBD.IgG..BAU.ml. >= q1r & numeric_covid_cap$RBD.IgG..BAU.ml. < q2r,]$RBD.IgG..BAU.ml. <- 2
numeric_covid_cap[numeric_covid_cap$RBD.IgG..BAU.ml. >= q2r & numeric_covid_cap$RBD.IgG..BAU.ml. < q3r,]$RBD.IgG..BAU.ml. <- 3
numeric_covid_cap[numeric_covid_cap$RBD.IgG..BAU.ml. >= q3r,]$RBD.IgG..BAU.ml. <- 4

numeric_covid_cap[numeric_covid_cap$S1.IgG..BAU.ml. < q1s1,]$S1.IgG..BAU.ml. <- 1
numeric_covid_cap[numeric_covid_cap$S1.IgG..BAU.ml. >= q1s1 & numeric_covid_cap$S1.IgG..BAU.ml. < q2s1,]$S1.IgG..BAU.ml. <- 2
numeric_covid_cap[numeric_covid_cap$S1.IgG..BAU.ml. >= q2s1 & numeric_covid_cap$S1.IgG..BAU.ml. < q3s1,]$S1.IgG..BAU.ml. <- 3
numeric_covid_cap[numeric_covid_cap$S1.IgG..BAU.ml. >= q3s1,]$S1.IgG..BAU.ml. <- 4

numeric_covid_cap[numeric_covid_cap$S2.IgG..BAU.ml. < q1s2,]$S2.IgG..BAU.ml. <- 1
numeric_covid_cap[numeric_covid_cap$S2.IgG..BAU.ml. >= q1s2 & numeric_covid_cap$S2.IgG..BAU.ml. < q2s2,]$S2.IgG..BAU.ml. <- 2
numeric_covid_cap[numeric_covid_cap$S2.IgG..BAU.ml. >= q2s2 & numeric_covid_cap$S2.IgG..BAU.ml. < q3s2,]$S2.IgG..BAU.ml. <- 3
numeric_covid_cap[numeric_covid_cap$S2.IgG..BAU.ml. >= q3s2,]$S2.IgG..BAU.ml. <- 4

numeric_covid_cap[numeric_covid_cap$N.IgG..BAU.ml. < q1n,]$N.IgG..BAU.ml. <- 1
numeric_covid_cap[numeric_covid_cap$N.IgG..BAU.ml. >= q1n & numeric_covid_cap$N.IgG..BAU.ml. < q2n,]$N.IgG..BAU.ml. <- 2
numeric_covid_cap[numeric_covid_cap$N.IgG..BAU.ml. >= q2n & numeric_covid_cap$N.IgG..BAU.ml. < q3n,]$N.IgG..BAU.ml. <- 3
numeric_covid_cap[numeric_covid_cap$N.IgG..BAU.ml. >= q3n,]$N.IgG..BAU.ml. <- 4

numeric_covid_cap$IFNG <- factor(numeric_covid_cap$IFNG, levels = c(1,2,3,4))
numeric_covid_cap$RBD.IgG..BAU.ml. <- factor(numeric_covid_cap$RBD.IgG..BAU.ml., levels = c(1,2,3,4))
numeric_covid_cap$S1.IgG..BAU.ml. <- factor(numeric_covid_cap$S1.IgG..BAU.ml., levels = c(1,2,3,4))
numeric_covid_cap$S2.IgG..BAU.ml. <- factor(numeric_covid_cap$S2.IgG..BAU.ml., levels = c(1,2,3,4))
numeric_covid_cap$N.IgG..BAU.ml. <- factor(numeric_covid_cap$N.IgG..BAU.ml., levels = c(1,2,3,4))


model_cap <- glm(COVID.19.Positive.~ ., data=numeric_covid_cap, family=binomial(link='logit'))
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
summary(model_cap)
## 
## Call:
## glm(formula = COVID.19.Positive. ~ ., family = binomial(link = "logit"), 
##     data = numeric_covid_cap)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.3438  -0.3609  -0.1834  -0.0102   2.5354  
## 
## Coefficients:
##                             Estimate Std. Error z value Pr(>|z|)   
## (Intercept)                  1.41968    1.16957   1.214  0.22481   
## IFNG2                       -1.26515    0.63260  -2.000  0.04551 * 
## IFNG3                       -1.24362    0.91277  -1.362  0.17305   
## IFNG4                       -1.85756    0.79354  -2.341  0.01924 * 
## RBD.IgG..BAU.ml.2           -3.09864    1.62364  -1.908  0.05633 . 
## RBD.IgG..BAU.ml.3          -20.08293 1498.33433  -0.013  0.98931   
## RBD.IgG..BAU.ml.4           -4.08014    1.69793  -2.403  0.01626 * 
## N.IgG..BAU.ml.3             -0.37318    0.65954  -0.566  0.57151   
## N.IgG..BAU.ml.4             -0.40601    1.02530  -0.396  0.69211   
## S1.IgG..BAU.ml.2             2.70203    1.57949   1.711  0.08714 . 
## S1.IgG..BAU.ml.3             4.93285    2.18612   2.256  0.02404 * 
## S1.IgG..BAU.ml.4             5.75011    2.04256   2.815  0.00488 **
## S2.IgG..BAU.ml.2             0.12257    0.91886   0.133  0.89388   
## S2.IgG..BAU.ml.3             0.21788    1.03034   0.211  0.83252   
## S2.IgG..BAU.ml.4            -1.05224    1.09013  -0.965  0.33442   
## Vaccinated.                 -1.22246    1.06418  -1.149  0.25066   
## Prior.COVID.19.             -0.23531    0.73934  -0.318  0.75028   
## Gender                       0.33978    0.51908   0.655  0.51274   
## Significant.co.morbidity.   -0.14830    0.74724  -0.198  0.84269   
## Age                         -0.06005    0.02111  -2.845  0.00444 **
## Ethnicity..Y...BAME.       -15.95561 2190.97919  -0.007  0.99419   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 173.65  on 340  degrees of freedom
## Residual deviance: 126.65  on 320  degrees of freedom
## AIC: 168.65
## 
## Number of Fisher Scoring iterations: 18
odds <- odds_plot(model_cap)
## Waiting for profiling to be done...
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
odds <- odds$odds_data
odds$OR <- log(odds$OR)
odds$lower <- log(odds$lower)
odds$upper <- log(odds$upper)


ggplot(odds, aes(x = OR, y = vars)) + 
    geom_vline(aes(xintercept = 0), size = .25, linetype = "dashed") + 
    geom_errorbarh(aes(xmax = upper, xmin = lower), size = .5, height = 
                    .2, color = "gray50") +
    geom_point(size = 3.5, color = "orange") +
    theme_bw()+
    theme(panel.grid.minor = element_blank()) +
    ylab("") +
    xlab("log(Odds ratio)") +
    ggtitle("Parameters and the risk of COVID infections - capillary dataset") +
  scale_x_continuous(limits = c(-10,1))
## Warning: Removed 12 rows containing missing values (geom_errorbarh).
## Warning: Removed 5 rows containing missing values (geom_point).

# Odd as percentages
(exp(model_cap$coefficients[-1])-1)*100
##                     IFNG2                     IFNG3                     IFNG4 
##                -71.780173                -71.166040                -84.394723 
##         RBD.IgG..BAU.ml.2         RBD.IgG..BAU.ml.3         RBD.IgG..BAU.ml.4 
##                -95.488967               -100.000000                -98.309492 
##           N.IgG..BAU.ml.3           N.IgG..BAU.ml.4          S1.IgG..BAU.ml.2 
##                -31.145944                -33.369762               1390.994030 
##          S1.IgG..BAU.ml.3          S1.IgG..BAU.ml.4          S2.IgG..BAU.ml.2 
##              13777.485854              31322.388855                 13.039624 
##          S2.IgG..BAU.ml.3          S2.IgG..BAU.ml.4               Vaccinated. 
##                 24.344084                -65.084452                -70.549661 
##           Prior.COVID.19.                    Gender Significant.co.morbidity. 
##                -20.967087                 40.463377                -13.782506 
##                       Age      Ethnicity..Y...BAME. 
##                 -5.828623                -99.999988

Cross-validation of logisitc regression model performed on venous dataset using bestglm

library(bestglm)
## Loading required package: leaps
#numeric_covid_vein 

numeric_covid_vein$COVID.19.Positive. <- factor(numeric_covid_vein$COVID.19.Positive., levels = c(0,1))


test_bestglm <-numeric_covid_vein[, c(names(numeric_covid_vein)[-1], "COVID.19.Positive.")]
names(test_bestglm)[ncol(test_bestglm)] <- "y"

test_bestglm$Gender <- as.numeric(test_bestglm$Gender)
test_bestglm$IFNg <- as.factor(test_bestglm$IFNg)
test_bestglm$RBD.IgG..BAU.ml. <- as.factor(test_bestglm$RBD.IgG..BAU.ml.)
test_bestglm$S1.IgG..BAU.ml. <- as.factor(test_bestglm$S1.IgG..BAU.ml.)
test_bestglm$S2.IgG..BAU.ml. <- as.factor(test_bestglm$S2.IgG..BAU.ml.)
test_bestglm$N.IgG..BAU.ml. <- as.factor(test_bestglm$N.IgG..BAU.ml.)

test_bestglm <-
    bestglm(Xy = test_bestglm,
            family = binomial,
            IC = "AIC",                 # Information criteria for
            method = "exhaustive")
## Morgan-Tatar search since family is non-gaussian.
## Note: factors present with more than 2 levels.
summary(test_bestglm$BestModel)
## 
## Call:
## glm(formula = y ~ ., family = family, data = Xi, weights = weights)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.3095  -0.8769  -0.3671   1.0508   2.8066  
## 
## Coefficients:
##                 Estimate Std. Error z value Pr(>|z|)    
## (Intercept)       0.3053     0.3823   0.799 0.424479    
## IFNg2            -0.4453     0.5160  -0.863 0.388151    
## IFNg3            -1.0627     0.5545  -1.916 0.055304 .  
## IFNg4            -2.3179     0.8262  -2.806 0.005022 ** 
## Prior.COVID.19.  -1.9063     0.5335  -3.573 0.000353 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 170.70  on 147  degrees of freedom
## Residual deviance: 136.38  on 143  degrees of freedom
## AIC: 146.38
## 
## Number of Fisher Scoring iterations: 5
odds <- odds_plot(test_bestglm$BestModel)
## Waiting for profiling to be done...
odds <- odds$odds_data
write.csv(odds, file = "/Users/lorenzocapitani/OneDrive\ -\ Cardiff\ University/Bioinformatics/MartinPaper/odds_final.csv")
odds$OR <- log(odds$OR)
odds$lower <- log(odds$lower)
odds$upper <- log(odds$upper)


ggplot(odds, aes(x = OR, y = vars)) + 
    geom_vline(aes(xintercept = 0), size = .25, linetype = "dashed") + 
    geom_errorbarh(aes(xmax = upper, xmin = lower), size = .5, height = 
                    .2, color = "gray50") +
    geom_point(size = 3.5, color = "orange") +
    theme_bw()+
    theme(panel.grid.minor = element_blank()) +
    ylab("") +
    xlab("log(Odds ratio)") +
    ggtitle("Parameters and the risk of COVID infections - Venous cohort") +
  scale_x_continuous(limits = c(-6,6))

# Odd as percentages
(exp(test_bestglm$BestModel$coefficients[-1])-1)*100
##           IFNg2           IFNg3           IFNg4 Prior.COVID.19. 
##       -35.93574       -65.44868       -90.15216       -85.13675